{
 "metadata": {
  "name": "",
  "signature": "sha256:74f8a577fe0a45a92ac9733d9b1cb7c5307d278fc815de113f015d43fb194bb5"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import pandas as pd"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "heading",
     "level": 5,
     "metadata": {},
     "source": [
      "get the stocks and names of the SP500 companies"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "sp500_companies = pd.DataFrame.from_csv(\"constituents.csv\") ##used a script from the internet that scrapes spindices\n",
      "sp500_companies['Name']=sp500_companies['Name'].str.replace('Co\\.|Inc|\\.|\\s\\s|\\sCo$|\\sCorp$','') #remove the titles such as Corp and Inc"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "company_names = list(sp500_companies['Name']) #create a list for the company names"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 4
    },
    {
     "cell_type": "heading",
     "level": 6,
     "metadata": {},
     "source": [
      "Twitter streaming subroutines"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "\n",
      "import tweepy\n",
      "from tweepy import StreamListener\n",
      "from twitterSentiment import models\n",
      "from datetime import datetime\n",
      "from pytz import timezone\n",
      "import json, time, sys\n",
      "\n",
      "tweetsmax = 10\n",
      "tweetscount = 0\n",
      "\n",
      "def timeupdate(twitterdate):\n",
      "        # method to return a django-supported time from twitter-based time entry\n",
      "        # input comes in the following fashion: Tue Jul 02 14:33:59 +0000 2013\n",
      "        # return 2013-06-18 18:23:22-04:00\n",
      "        central = timezone('US/Central')\n",
      "        return central.localize(datetime.strptime(twitterdate, '%a %b %d %H:%M:%S +0000 %Y'))\n",
      "    \n",
      "class TwitterListener(StreamListener):\n",
      "\n",
      "    \n",
      "    \n",
      "    #ideas from http://digitalfoo.net/posts/using-python-and-tweepy-to-scrape-streaming-tweets-into-mongodb/\n",
      "    #and http://nbviewer.ipython.org/github/alexhanna/hse-twitter/blob/master/docs/Collecting%20Twitter%20data%20from%20the%20API%20with%20Python.ipynb\n",
      "    \n",
      "    def __init__(self, api = None, fprefix = 'streamer'):\n",
      "        self.api = api or API()\n",
      "        self.counter = 0\n",
      "   \n",
      "    def on_data(self, data):\n",
      "        global tweetsmax\n",
      "        global tweetscount\n",
      "        print (tweetsmax)\n",
      "        print (tweetscount)\n",
      "        if (tweetsmax == tweetscount):\n",
      "            tweetscount=0\n",
      "            return False\n",
      "        else:\n",
      "            tweetscount = tweetscount+1\n",
      "        try:\n",
      "            tweet = json.loads(data) #convert twitter stream in json into Python dictionary\n",
      "            if isinstance(tweet, dict):\n",
      "                if tweet['user']['lang'] != 'en':\n",
      "                    return\n",
      "                else:\n",
      "                    print (\"tweet: \", tweet['text'])\n",
      "                    TwitterDatabase(tweet)\n",
      "        except:\n",
      "            print (\"Error in Twitter listener. Error message:\", sys.exc_info())\n",
      "        return\n",
      "    def on_limit(self, track):\n",
      "        print(\">> limit\")\n",
      "        return \n",
      "\n",
      "    def on_error(self, status_code):\n",
      "        print(\">>> error: \", str(status_code) + \"\\n\")\n",
      "        return \n",
      "\n",
      "    def on_timeout(self):\n",
      "        print(\">>> timeout Sleeping for 60 seconds...\\n\")\n",
      "        time.sleep(60)\n",
      "        return \n",
      "\n",
      "  \n",
      "def TwitterDatabase(tweet):\n",
      "    ## take Twitter data in jsonformat and insert into the database\n",
      "    try:\n",
      "        aTweet = models.TwitterText(twitter_user_id=tweet['user']['id'], \n",
      "                                        twitter_user_name=tweet['user']['screen_name'],\n",
      "                                        twitter_text=tweet['text'], \n",
      "                                        twitter_text_id=tweet['id'], \n",
      "                                        twitter_text_timestamp=timeupdate(tweet['created_at']), \n",
      "                                        twitter_text_keyword=keywords)\n",
      "        aTweet.save()\n",
      "    except:\n",
      "        print (\"Error in Django insert tweet. error message:\", sys.exc_info())\n",
      "    return\n",
      "        \n",
      "    \n",
      "def TwitterStreaming(company_names):\n",
      "    ## twitter authentication keys\n",
      "    \n",
      "      \n",
      "\n",
      "    consumer_key        = \"yoWOau00G19Q81WKeVZ6g60zU\"\n",
      "    consumer_secret     = \"A0rJ4XlMndHv2xTeQlA2t7N9thBr3FDRu6vkrCy5ab7KAiKmNB\"\n",
      "    access_token        = \"16859687-bt1jbTlHUXO39n114gWEpg24VlKZQVbaF4AgXs4ha\"\n",
      "    access_token_secret = \"kLMge9f3GypNwHv6N9uMCuUdLS7kr5gfR5lzTXEmwMyfi\"\n",
      "    global keywords\n",
      "    keywords = [company_names]\n",
      "    try:\n",
      "        auth = tweepy.OAuthHandler(consumer_key, consumer_secret)\n",
      "        auth.set_access_token(access_token, access_token_secret)\n",
      "        api = tweepy.API(auth)\n",
      "        \n",
      "        listener = TwitterListener(api, \"test\")\n",
      "        print (\"Begin Twitter streaming for \", keywords)\n",
      "        stream = tweepy.Stream(auth, listener)\n",
      "        print (keywords)\n",
      "        stream.filter(track=company_names, follow=\"\")\n",
      "    except:\n",
      "        print (\"Error in Twitter streaming\",sys.exc_info())\n",
      "    return True"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 5
    },
    {
     "cell_type": "heading",
     "level": 6,
     "metadata": {},
     "source": [
      "Pick 10 companies to build the Twitter search keyword "
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "company_keyword_count = 10\n",
      "company_random = random.choice(company_names, company_keyword_count)\n",
      "companieslist = \",\".join(\"\\\"\"+str(companies.rstrip())+\"\\\"\" for companies in company_random)\n",
      "print (companieslist)\n",
      "\n",
      "companieslist = ['Procter & Gamble']"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "\"Loews Corp\",\"Autodesk\",\"Computer Sciences Corp\",\"National Oilwell Varco\",\"International Bus Machines\",\"Carnival Corp\",\"Chesapeake Energy\",\"Crown Castle International Corp\",\"FirstEnergy\",\"Adobe Systems\"\n"
       ]
      }
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "TwitterStreaming(companieslist)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}